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FIGURE 1 

TGGCCTCCCCAGCTTGCCAGGCACAAGGCTGAGCGGGAGGAAGCGAGAGGCATCTA 

AGCAGGCAGTGTTTTGCCTTCACCCCAAGTGACCATGAGAGGTGCCACGCGAGTCTC 

AATCATGCTCCTGCTAGTAACTGTGTCTGACTGTGCTGTGATCACAGGGGCCTGTGA 

GCGGGATGTCCAGTGTGGGGCAGGCACCTGCTGTGCCATCAGCCTGTGGCTTCGAGG 

GCTGCGGATGTGCACCCCGCTGGGGCGGGAAGGCGAGGAGTGCCACCCCGGCAGCC 

ACAAGGTCCCCTTCTTCAGGAAACGCAAGCACCACACCTGTCCTTGCTTGCCCAA.ee 

TGCTGTGCTCCAGGTXCCCGGACGGCAGGTACCGCTGCTCCATGGACTTGAAGAACA 

TCAATTTTTAGGiGCTfGCCTGGTCTCAGGATACCCACCATCCTTTTCCTGAGCACAG 

CCTGGATTTTTATTTCTGCCATGAAACCCAGCTCCCATGACTCTCCCAGTCCCTACAC 

TGACTACCCTGAfCfCTCTTGTCTAGTACGCACATATGCACACAGGCAGACATACCT 

CCCATCATGACATGGTCCCCAGGCTGGCCTGAGGATGTCACAGCTTGAGGCTGTGGT 

GTGAAAGGTGGCCAGCCTGGTTCTCTTCCCTGCTCAGGCTGCCAGAGAGGTGGTAAA 

TGGCAGAAAGGACATTCCCCCTCCCCTCCCCAGGTGACCTGCTCTCTTTCCTGGGCCC 

TGCCCCTCTCCCCACATGTATCCCTCGGTCTGAATTAGACATTCCTGGGCACAGGCTC 

TTGGGTGCATTGCTCAGAGTCCCAGGTCCTGGCCTGACCCTCAGGCCCTTCACGTGA 

GGTCTGTGAGGACCAATTTGTGGGTAGTTCATCTTCCCTCGATTGGTTAACTCCTTAG 

TTTCAGACCACAGACTCAAGATTGGCTCTTCCCAGAGGGCAGCAGACAGTCACCCCA 

AGGCAGGTGTAGGGAGCCCAGGGAGGCCAATCAGCCCCCTGAAGACTCTGGTCCCA 

GTCAGCCTGTGGCTTGTGGCCTGTGACCTGTGACCTTCTGCCAGAATTGTCATGCCTC 

TGAGGCCCCCTCTTACCACACTTTACCAGTTAACCACTGAAGCCCCCAATTCCCACA 

GCTTTTCCATTAAAATGCAAATGGTGGTGGTTCAATCTAATCTGATATTGACATATTA 

GAAGGCAATTAGGGTGTTTCCTTAAACAACTCCTTTCCAAGGATCAGCCCTGAGAGC 

AGGTTGGTGACTTTGAGGAGGGCAGTCCTCTGTCCAGATTGGGGTGGGAGCAAGGG 

ACAGGGAGCAGGGCAGGGGCTGAAAGGGGCACTGATTCAGACCAGGGAGGCAACT 

ACACACCAACATGCTGGCTTTAGAATAAAAGCACCAACTGAAAAAA 



• 



# 



FIGURE 2 

MRGATRVSIMLLLVTVSDCAVITGACERDVQCGAGTCCAISLWLRGLRMCTPLGREGEE 
C 

HPGSHKVPFFRKRKHHTCPCLPNLLCSRFPDGRYRCSMDLKNINF 



Important features: 

Signal peptide: 
1-19 



N-myristoylation sites: 
33 
35 
46 
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PR0 XXXXXXXXXXXXXXX (Length = 1 5 amino acids) 

Comparison Protein XXXXXYYYYYYY (Length = 1 2 amino acids) 

% amino acid sequence identity = 

(the number of identically matching amino acid residues between the two polypeptide sequences 
as determined by ALIGN-2) divided by (the total number of amino acid residues of the PRO 
polypeptide) = 

5 divided by 15 = 33.3% 
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PR 0 XXXXXXXXXX (Length = 1 0 amino acids) 

Comparison Protein XXXXXYYYYYYZZYZ (Length = 1 5 amino acids) 

% amino acid sequence identity = 

(the number of identically matching amino acid residues between the two polypeptide sequences 
as determined by ALIGN-2) divided by (the total number of amino acid residues of the PRO 
polypeptide) = 



5 divided by 10 = 50% 
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PRO-DNA >MNnWNNNNNNNNN (Length = 14 nucleotides) 

Comparison DNA NNNNNNLLLLLLLLLL (Length = 1 6 nucleotides) 

% nucleic acid sequence identity = 



(the number of identically matching nucleotides between the two nucleic acid sequences as 
determined by ALIGN-2) divided by (the total number of nucleotides of the PRO-DNA nucleic 
acid sequence) = 

6 divided by 14 = 42.9% 
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PRO-DNA 
Comparison DNA 



NNNNNNNNNNNN 
NNNNLLLW 



% nucleic acid sequence identity = 



(Length = 12 nucleotides) 

(Length = 9 nucleotides) 



(the number of identically matching nucleotides between the two nucleic acid sequences as 
determined by ALIGN-2) divided by (the total number of nucleotides of the PRO-DNA nucleic 
acid sequence) = 



4 divided by 12 = 33.3% 



FIGURE 4 

TGGCTCCCCAGCTTGCCAGGCACAAGGCTGAGCTGGAGGAAGCGAGANGCATCTAA 
GCAG 

GCAGTGTTTTGCCTTCACCCCAAGTGACCATGAGAGGTGCCACGCGAGTCTCAATCA 
TGC 

TCCTCCTAGTAACTGTGTCTGACTGTGCTGTGATCACAGGGGCCTGTGAGCGGGATG 

AGTGTGGGGCAGGCACCTGCTGTGCCATCAGCCTGTGGCTTCGAGGGCTGCGGATGT 
GCA 

CCCCGCTGGGGCGGGAAGGCGAGGAGTGCCACCCCGGCAGCCACAAGGTCCCCTTC 
TTCA 

GGAAACGCAAGCACCACACCTGTCTTGTTGCCCAACCTGCTGTGCTCCAGTTCCGGA 
CGG 

CAGTACGCTGCTCA 
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FIGURE 1 6 A-C 
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GTCATCACAGGGGCCTCTGAGCGGGATGTCCAGTGTGGGGCAGGCACCTG 200 
viTGACERDVQCGArTr 

ctgtgccatcagcctgtggcttcgagggctgcggatgtgcaccccgctgS 
laislwlrglrmctpt 

ggcgggaaggcgaggagtgccaccccggcagccacaaggtccccttcttc 
^kegeechpgshkvpfp 

AGGAAACGCAAGCACCACACCTGTCCTTCCTTCCCCAACCTCCTGTGCTC 
RKRKHHTCPCLPNI. I rc 

CAGGTTCCCGGACGGCAGGTACCGCTGCTCCATCGACTTCAAGMCATCA 
Kb PDGRYRC 



ATTT 
N F 



S M D L K N 



400 



'TTAGGCGCTTGCCTGGTCTCAGGATACCCACCATCC 



TTTTCCTGAG 



CACAGCCTGGATTTTTATTTCTGCCATCAAACCCAGCTCCCATCACTCTC 
CCAGTCCCTACACTGACTACCCTCATCTCTCTTCTCTAGTACGCAcS 
GCACACAGGCAGACATACCTCCCATCATGACATGGTCCCCAGGCTGGCCT fino 

gaggatgtcacagcttcaggctct^tctcaaaggtggccaS 
tctoccctgctcaggctgccagagaggtggtaaatcgcagaaaS?!?? 
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normalized luciferase activity 




• 



FIGURE 18 



□ 

hi 




FIGURE 20 A 



/* 
* 

* C-C increased from 12 to 15 
*Zis average ofEQ 

* B is average of ND 

* match with stop is Jvl; stop-stop = 0; J (joker) match = 0 
V 

#defme _M -8 /* value of a match with a stop V 



nl. 



a 



int 

/* A 

/* A */ 

/*B*/ 

/*C*/ 

/*D*/ 

/*EV 

/*F*/ 

/*G*/ 

/* H */ 

/* I */ 

/* J */ 

/*K*/ 

/*L*/ 

/*M*/. 

/*N*/ 

1*0*1 

/*?*/ 

/*Q*/ 

/*R*/ 

/*S*/ 

/*T*/ 

/*U*/ 

/* V*/ 

/* w*/ 
/*x*/ 

/* Y*/ 

/*z*/ 
}; 



_day[26)[26] = { 

BCDEFGH1JKLMNOPQRSTUVWXYZV 
2, 0,-2, 0, 0,-4, 1,-1,-1, 0,-1,-2,-1, 0,_M, 1, 0,-2, 1, 1,0, 0,-6, 0,-3, 0}, 
0, 3,-4, 3, 2,-5, 0, 1,-2, 0, 0,-3,-2, 2,JVI,-1, 1, 0, 0, 0, 0,-2,-5, 0,-3, 1}, 
[-2,-4,15,-5,-5,-4,-3,-3,-2, 0,-5,-6,-5,-4,_M,-3,-5,-4, 0,-2, 0,-2,-8, 0, 0,-5}, 
0, 3,-5, 4, 3,-6, 1, 1,-2, 0, 0,-4,-3, 2,_M,-J, 2,-1, 0, 0, 0,-2,-7, 0,-4, 2}, 

0, 2,-5, 3, 4,-5, 0, 1,-2,-0, 0,-3,-2, 1,_M,-1, 2,-1, 0, 0, 0,-2,-7, 0,-4, 3}, 
[-4,-5,-4,-6,-5, 9,-5,-2, 1, 0,-5, 2, 0,-4,_M,-5,-5,-4,-3,-3, 0,-1, 0, 0, 7,-5}, 

1, 0,-3, 1, 0,-5, 5,-2,-3, 0,-2,-4,-3, 0,_M,-l,-l,-3, 1, 0, 0,-1,-7, 0,-5, 0}, 
[-1, 1,-3, I, 1,-2,-2, 6,-2, 0, 0,-2,-2, 2,_M, 0, 3, 2,-1,-1, 0,-2,-3, 0, 0, 2}, : 
[-1,-2,-2,-2,-2, 1,-3,-2, 5, 0,-2, 2, 2,-2,_M,-2,-2,-2,-l, 0, 0, 4,-5, 0,-1,-2}, 

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,_M, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-1, 0,-5, 0, 0,-5,-2, 0,-2, 0, 5,-3, 0, l,Jvl,-l, 1,3, 0, 0, 0,-2,-3, 0,-4, 0}, 
-2,-3,-6,-4,-3, 2,-4,-2, 2, 0,-3, 6, 4,-3,_M,-3,-2,-3,-3,-l, 0, 2,-2, 0,-1,-2}, 
[-1,-2,-5,-3,-2, 0,-3,-2, 2, 0, 0, 4, 6,-2,_M,-2,- 1, 0,-2,-1, 0, 2,-4, 0,-2,-1}, 

0, 2,-4, 2, 1,-4, 0, 2,-2, 0, 1,-3,-2, 2,_M,-1, 1, 0, 1, 0, 0,-2,-4, 0,-2, 1}, 
{_M^M,_M,_M^M,_M,_M,_M,_M,_M,_M,_M,_M 0,_M,_M,_M,_M^M,_M,_M,_M,_M,_M,^M} , 

1, -1,-3,-1,-1,-5,-1, 0,-2, 0,-l,-3,-2,-l,_M, 6, 0, 0, 1, 0, 0,-1,-6, 0,-5, 0}, 

0, 1,-5, 2, 2,-5,-1, 3,-2, 0, 1,-2,-1, 1,_M, 0, 4, 1,-1,-1, 0,-2,-5, 0,-4, 3}, 
-2, 0,-4,-1,-1,-4,-3, 2,-2, 0, 3,-3, 0, 0,_M, 0, 1, 6, 0,-1, 0,-2, 2, 0,-4, 0}, 

1, 0, 0, 0, 0,-3, 1,-1,-1, 0, 0,-3,-2, l.JM, 1,-1, 0, 2, 1, 0,-1,-2, 0,-3, 0}, 
1, 0,-2, 0, 0,-3, 0,-1, 0, 0, 0,-1,-1, 0,_M, 0,-1,-1, 1, 3, 0, 0,-5, 0,-3, 0}, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,_M, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
0,-2,-2,-2,-2,-1,-1,-2, 4, 0,-2, 2, 2,-2,_M,- 1,-2,-2,-1, 0, 0, 4,-6, 0,-2,-2}, 
-6,-5,-8,-7,-7, 0,-7,-3,-5, 0,-3, -2,-4,-4, Jvl,-6,-5, 2,-2,-5, 0,-6,1 7, 0, 0,-6}, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,_M, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-3,-3, 0,-4,^4, 7,-5, 0,-1, 0,-4,- l,-2,-2,_M,-5, -4,-4,-3,-3, 0,-2, 0, 0,10,-4}, 
0, 1,-5,2,3,-5,0,2,-2,0,0,-2,-1, 1,_M, 0, 3, 0, 0, 0, 0,-2,-6, 0,-4, 4} 
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/* 
*/ 

^include <stdio.h> 
#in elude <ctype.h> 



#define 


MAXJMP 


16 


/* max jumps in a diag */ 


#deilne 


MAXGAP 


24 


/* don't continue to penalize gaps larger than this */ 


#deilne 


JMPS 


1024 


/* max jmps in an path */ 


#deflne 


MX 


4 


/* save if there's at least MX-1 bases since last jmp */ 


Adeline 


DMAT 


3 


/* value of matching bases */ 


tfdefine 


DMIS 


0 


/* penalty for mismatched bases */ 


#define 


DINSO 


8 


/* penalty for a gap */ 


Adeline 


DINS1 


1 


/* penalty per base */ 


Adeline 


PINSO 


8 


/* penalty for a gap */ 


Adefine 


PINS1 


4 


/* penalty per residue */ 



struct jmp { 

short n[MAXJMP]; /* size of jmp (neg for dely) */ 

unsigned short x[MAXJMP]; /* base no. of jmp in seq x */ 

}; /* limits seq to2 A 16-l */ 



struct diag { 








int 


score; 


/* score at last jmp */ 




long 


offset; 


/* offset of prev block */ 




short 


ijmp; 


/* current jmp index */ 


}; 


struct jmp jp; 


/* list of jmps */ 


struct path { 








int 


spc; 


/* number of leading spaces */ 




short 


n[JMPS];/* size 


of jmp (gap) */ 


}; 


int 


x[JMPS];/* loc of jmp (last elem before gap) */ 


char 




*ofile; 


/* output file name */ 


char 




*namex[2]; 


/* seq names: getseqsO */ 


char 




*prog; 


/* prog name for err msgs */ 


char 




*seqx[2]; 


/* seqs: getseqsO */ 


int 




dmax; 


/* best diag: nwQ V 


int 




dmaxO; 


/* final diag */ x 


int 




dna; 


/* set if dna: mainO */ 


int 




endgaps; 


/* set if penalizing end gaps */ 


int 




gapx,gapy; 


/* total gaps in seqs */ 


int 




lenOJenl; • 


/* seq lens */ 


int 




ngapx, ngapy • 


/* total size of gaps */ 


int 




smax; 


/* max score: nw() */ 


int 




*xbm; 


/* bitmap for matching */ 


long 




offset; 


/* current offset in jmp file */ 


struct 


diag 


*dx; 


/* holds diagonals */ 


struct 


path 


pp[2]; . 


/* holds path for seqs */ 


char 




*calloc0, *malloc0, *index0, *strcpyO; 


char 




*getseq(), *g_caIIoc(); 
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/* Needleman-Wunsch alignment program 



* usage: progs file] file2 

* where file 1 and file2 are two dna or two protein sequences. 

* The sequences can be in upper- or lower-case an may contain ambiguity 

* Any lines beginning with *>' or *<■ are ignored 

* Max file length is 65535 (limited by unsigned short x in the jriip struct) 

* A sequence with 1/3 or more of its elements ACGTU is assumed to be DNA 

* Output is in the file "align. out" 
* 

* The program may create a tmp file in /tmp to hold info about traceback. 

* Original version developed under BSD 4.3 on a vax 8650 
*/ 

^include "nw h" 
include "day h" 

> 

static _dbval[26] = { 

1 , 1 4,2, 1 3,0,0,4, 1 1 ,0,0, 1 2,0,3, 1 5,0,0,0,5,6,8,8,7,9,0, 1 0,0 

}; 

static _pbval[26] = { 

1, 2|(1«CD^A'))|(I«CNVA')), 4, 8, 16, 32, 64, \ 
128, 256, OxFFFFFFF, 1«10, 1«1 1, 1«12, 1«13, 1«14, 
1«15, 1«16, 1«17, 1«18, 1«19, 1«20, 1«21, 1«22, 
1 «23, 1 <<24, 1 «25|( 1 «?E- t A'M 1 «CQ , - , A , » 

}; 

main(ac,av) main 
int ac; 
char *av[]; 



{ 



prog = av[0]; 
if(ac!=3){ 

fprintf(stderr, M usage: %sfilel file2\n", prog); 

fprintf(stderr, "where filel and flle2 are two dna or two protein sequencesAn"); 
fprintf(stderr, ?, The sequences can be in upper- or lower-caseW); 
fprintf(stderr, M Any lines beginning with V or are ignored\n"); 
fprintfl[stdeiT,"Output is in the file \ n align.out\"\n"); .' 
exit(l); 

} 

namex[0] = av[l]; 

namex[l] = av[2]; 

seqx[0] = getseq(namex[0], &len0); 

seqx[l] = getseq(namex[l], &lenl); 

xbm = (dna)? _dbval : _pbval; 

endgaps = 0; /* 1 to penalize endgaps */ 

ofile = "align.out"; /*. output file */ 

nw(); /* fill in the matrix, get the possible jmps */ 

readjmpsO; /* get the actual jmps */ ( 

printO; /* print stats, alignment */ 



' cleanup(O); 
Page 1 of nw.c 



/♦ unlink any tmp files */ 
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/* do the alignment, return best score: main() 

* dna: values in Fitch and Smith, PNAS, 80, 1382-1386, 1983 

* pro: PAM 250 values 

* When scores are equal, we prefer mismatches to any gap, prefer 

* a new gap to extending an ongoing gap, and prefer a gap in seqx 

* to a gap in seq y. 
*/ 

nw0 
{ 



char 


*p*, *py; 


/* seqs and ptrs */ 


int 


*ndely, *dely; 


/* keep track of dely */ 


int 


ndelx, delx; 


/* keep track of delx */ 


int 


*tmp; 


/* for swapping rowO, rowl */ 


int 


mis; 


/* score for each type */ 


int 


insO, inst; 


/* insertion penalties */ 


register 


id; 


/* diagonal index */ 


register 


u; 


/* jmp index */ 


register 


*colO, *coll; 


/* score for curr, last row */ 


register 


xx, yy; 


/* index into seqs */ 



dx = (struct diag *)g_cailoc("to get diags", Ien0+Ienl + 1, sizeof(struct diag)); 

ndely = (int *)g_calloc("to get ndely", lenl + 1, sizeof(int)); 
dely = (int *)g_calloc("to get dely", len 1 + 1 , sizeof(int)); 
colO = (int *)g_caIioc("to get coIO", lenl+1, sizeof(int)); 
coll - (int *)g_calloc("to get coll", lenl+1, sizeof(int)); 
insO = (dna)? DINSO : PFNSO; 
insl -(dna)?DINSl :PFNS1; 

smax = -10000; 
if (endgaps) { 

for (col0[0] = dely[0] = -insO, yy = 1 ; yy <= lenl ; yy++) { 
col0[yy] = dely[yy] = col0[yy-l] - insl ; 
ndely[yy] = yy; 

} 

col0[0] = 0; /* Waterman Bull Math Biol 84 */ 

} 

else 

for (yy = 1 ; yy <= len 1 ; yy++) 
delyfyy] = -insO • 

/* fill in match matrix 
*/ 

for (px = seqx[0], xx = 1 ; xx <= lenO; px++, xx+-f) { 
/♦ initialize first entry in col 
*/ 

if (endgaps) { 

if(xx = l) 

col 1 [0] = delx = -(insO+ins 1 ); 

else 

col 1 [0] = delx = col0[0] - ins 1 ; 
ndelx = xx; 

} 

else { 

coll[0] = 0; 
delx = -insO; 
ndelx = 6; 

} 
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...nw 

for (py = seqx[I], yy = 1 ; yy <= lenl ; py++, yy++) { 
mis = col0[yy-l]; 
if(dna) 

mis += (xbm[*px-VV]&xbm[*py-'A'])? DMAT : DMIS; 

else 

mis += _day[*px- , A'][*py-'A']; 

/* update penalty for del in x seq; 

* favor new del over ongong del 

* ignore MAXGAP if weighting endgaps 
*/ 

if (endgaps || ndely[yy] < MAXGAP) { 

if (col0[yy] - insO >= dely[yy]) { 

dely[yy] = col0[yy] - (insO+insl); 
ndely[yy] = I ; 



|J } else { 

fU. dely[yy] -= insl; 

SJ ndely[yy]++; 

« } else { 

H! if (colOfyy] - (insO+insl) >= dely[yy]) { 

dely[yy] = colOfyy] - (insO+insl); 

s ndely[yy] = 1 ; 

M } else 

ft! ndely[yy]++; 



/* update penalty for del in y seq; 

* favor new del over ongong del 
*/ 

if (endgaps || ndelx < MAXGAP) { 

if(coll[yy-l]-insO >= delx) { 

delx = coll [yy-1] - (insO+insl); 
ndelx = 1 ; 

} else { 

delx -= insl; 
ndelx++; 

} 

} else { 

if (col 1 [yy- 1 ] - (insO+ins 1 ) >= delx) { 

delx = coll [yy-1] - (insO+insl); 
ndelx = 1 ; 

} else 

ndelx++; 

} 

/* pick the maximum score; we're favoring 

* mis over any del and delx over dely 
*/ 



Page 3 of nw.c 



FIGURE 20 F 



id = xx - yy + len 1 - 1 ; 

if (mis >= delx && mis >= deiy[yy]) 

col 1 [yy] - mis; 
else if (delx >= dely[yy]) { 

col 1 [yy] = delx; 

ij = dx[id].ijmp; 

if (dx[id].jp.n[0] && (!dna || (ndelx >= MAXJMP 
&& xx > dx[id].jp.x[ij]+MX) || mis > dx[id].score+DINSO)) { 
dx[id].ijmp++; 
if(++ij>= MAXJMP) { 
writejmps(id); 
ij = dx[id].ijmp = 0; 
dx[id].offset = offset; 

offset += sizeof(struct jmp) + sizeof(offset); 

} 

} 

dx[id].jp.n[ij] = ndelx; 
dx[id].jp.x[ij] = xx; 
dx[id].score = delx; 

} 

else { 

coll[yy] = dety[yy]; 
ij = dx[id].ijmp; 

if (dx[id]jp.n[0] && (!dna || (ndely[yy] >= MAXJMP 

&& xx > dx[id].jp.x[ij]+MX) || mis > dx[id].score+DrNSO)) { 
dx[id].ijmp++; 
if(++ij>= MAXJMP) { 
writejmps(id); 
ij - dx[id].ijmp = 0; 
dx[id].offset = offset; 

offset += sizeof(struct jmp) + sizeof(offset); 

} 

} 

dx[id] jp.n[ij] = -ndely[yy]; 
dx[id] jp.x[ij] = xx; 
dx[id].score = dely[yy]; 

} 

if(xx = len0 &&yy<lenl) { 
/* last col 
*/ 

if (endgaps) 

collfyy] -= insO+insl*(leni-yy); 
if (coll[yy]>smax) { 

smax = col I [yy]; 

dmax = id; 

} 

} 

} 

if (endgaps && xx<len0) 

col I [yy- 1 ] — insO+ins 1 *(len0-xx); 
if (coll [yy-l]> smax) { 

smax = coll [yy-1]; 

dmax = id; 

} 

tmp = colO; colO = coll; coll = tmp; 

} 

(void) free((char *)ndely); 
(void) free((cbar *)dely); 
(void) free((char *)co!0); 

(void) free((char *)colt);} Page 4 of nw.c 



5 B 

pa 



FTGURE 20 G 



* print() — only routine visible outside this module 
* 

* static: 

* getmat() trace back best path, count matches: print() 

* pr_al>gn() P nnt alignment of described in array p[]: print() 

* dumpblock() -- dump a block of lines with numbers, stars: pr_align() 

* nums() - put out a number line: dumpblock() 

* putline() -- put out a line (name, [num], seq, [num]): dumpblock() 

* stars() - -put a line of stars: dumpblock() 

* stripnameO -- strip any path and prefix from a seqname 
*/ 

^include "nw.rT 
#defme SPC 3 

#define P_LINE 256 /* maximum output line */ 
O #deflne P_SPC 3 /* space between name or num and seq */ 

: y 

i extern _day[26][26]; 

» int olen; /* set output line length */ 

Oj FILE *fx; /* output file */ 



print() 
{ 



LsJ nrintH pFlDt 

int Ix, ly, firstgap, lastgap; /* overlap */ 

if ((fx = fopen(ofile, "w")) 0) { 

fprintf(stderr,"%s: can't write %s\n", prog, ofile); 
cleanup(l); 

fprintf(fx, "<first sequence: %s (length = %d)\n", namex[0], lenO); 
fprintf(fx, "<second sequence: %s (length = %d)\n", namexfl], lenl); 
olen = 60; 
lx = lenO; 
ly = lenl ; 

firstgap = lastgap = 0; 

if (dmax < len 1 - 1 ) { /* leading gap in x */ 

pp[0J.spc = firstgap = lenl - dmax - 1; 
ly pp[0].spc; 

else if (dmax > lenl - I) { /* leading gap in y V 
pp[l].spc = firstgap = dmax - (lenl - 1); 
lx -= pp[l] spc; 

} 

if (dmaxO < lenO - 1) { /* trailing gap in x */ 
lastgap - lenO - dmaxO -1; 
lx -= lastgap; 

else if (dmaxO > lenO - I) { /* trailing gap in y */ 
lastgap = dmaxO - (lenO - 1 ); 
ly -= lastgap; 

} 

getmat(lx, ly, firstgap, lastgap); 
pr_align(); 
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/* 

* trace back the best path, count matches 
*/ 

static 

getmatflx, ly, firstgap, lastgap) 



getmat 



int lx, ly; 

int firstgap, lastgap; 



/* "core" (minus endgaps) */ 
/* leading trailing overlap */ 



int 

char 

double 

register 

register char 



nO, nl; 
*p0, *pl; 



nm, iO, il, sizO, sizl; 



outx[32]; 
pet; 



/* get total matches, score 



Q 




iO = il = sizO = sizl =0; 
pO = seqx[0] + pp[l]spc; 
pi = seqx[l] + pp[0].spc; 
nO = pp[l]spc + 1; 
nl = pp[0].spc + I; 

nm = 0; 

while ( *p0 && *pl ) { 
if (sizO) { 

pl++; 
nl++; 
sizO--; 

} 

else if (sizl) { 

p(H+; 

nOH-; 
sizl-; 

} 

else { 

if (xbm[*pO , A , ]&xbm[*pl- , A*]) 

nm++; 
if(n04+ = pp [0].x[i0]) 

sizO = pp[0].n[i(HH-]; 
if(nl++ = pp[l].x[il]) 

sizl =pp[l].n[il-H-]; 

p0++; 
pl++; 



/* pet homology: 

* if penalizing endgaps, base is the shorter seq 

* else, knock off overhangs and take shorter cc 
*/ 

if (endgaps) 

lx = (lenO<Ienl)? lenO: lenl ; 



lx « (lx < ly)? lx : ly; 
pct= 100.*(double)nm/(double)lx; 
fprintf(fx, rt \n M ); 

fprintf(fx, "<%d match%s in an overlap of %d: %.2f percent similarity\ri", 
nrn ,(nm=l)?"":"es",Ix,pct); 



} 



else 
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fprintf(fx, "<gaps in first sequence: %d", gapx); 
if (gapx) { 

(void) sprintf(outx, " (%d %s%s)", 

ngapx, (dna)? "baseV'residue", (ngapx = 1)? "Vs"); 
fprint^fx/^/oS", outx); 



..getmat 



y 



fprintf(fx, gaps in second sequence: %d", gapy); 

if(gapy){ 

(void) sprintfl[outx, " (%d %s%s)", 

ngapy, (dna)? "baseV'residue", (ngapy == 1)? "":"s"); 
fprintf(fx,"%s", outx); 



if (dna) 



else 



fprintf(fx, 

"\n<score: %d (match = %d, mismatch = %d, gap penalty = %d + %d per base)Vn" 
smax, DM AT, DMIS, DFNSO, DfNSl); 



fprintf(fx, 

"\n<score: %d (Dayhoff PAM 250 matrix, gap penalty = %d + %d per residue)\n" 
smax,PINSO,PrNSl); 
if (endgaps) 

fprintf(fx, 

"<endgaps penalized, left endgap: %d %s%s, right endgap: %d %s%s\n", 
firstgap, (dna)? "base" : "residue", (firstgap — 1)? "" : "s", 
lastgap, (dna)? "base" : "residue", (lastgap — 1)? "" : "s"); 



else 



fprintf(fx, "<endgaps not penalizedV); 



III 



static nm; /* matches in core -- for checking */ 

static Imax; /* lengths of stripped file names */ 

static ij[2]; /* jmp index for a path */ 

static nc[2]; /* number at start of current line */ 

static ni[2J; /* current elem number -- for gapping V 

static siz[2]; 

static char *ps[2]; /* ptr to current element */ 

static char *P°[2]; /* ptr to next output char slot */ 

static char out[2][P_LINE]; /* output line */ 

static char star[P_LfNE]; /* set by starsQ */ 



/* 

* print alignment of described in struct path pp[] 
*/ 

static 

pr_align() 
{ 



int 
int 

register 



nn; 

more; 

i; 



/* char count */ 



pralign 



for(i= 0, Imax = 0; i < 2; i++) { 

nn = stripname(namex[i]); 
if (nn > Imax) 

Imax = nn; 



nc[i]=l; 
ni[i]-l; 
siz[i] - = 0; 
ps[i] = seqx[i]; 
po[i] =out[i]; 



} 
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for (nn = nm = 0, more = 1 ; more; ) { ...pr_align 
for (i = more = 0; i < 2; i>+) { 
/* 

* do we have more of this sequence? 
*/ 

if (!*ps[i]) 

continue; 

more++; 

if (pp[i].spc) { /* leading space */ 
*po[i]++ = "; 
pp[i].spc«; 

} 

else if (siz[i]) { /* in a gap */ 
*po(i]+t = 
siz[i)~; 

) 

else { /* we're putting a seq element 

*/ 

•po[i] = *ps[i]; 
if (is!ower(*ps[i])) 

*ps[i] = toupper(*ps[i]); 

po[i]++; 
ps[i]++; 

/* 

* are we at next gap for this seq? 
*/ 

if(ni[i] = pp[i].x[ij[i]]){ 
/* 

* we need to merge all gaps 

* at this location 
*/ 

siz[i]-pp[i]n[ij[i]++]; 
while (ni[i]== pp[i].x[ij[i]]) 

siz[i] += pp[i]n[ij[i]++]; 

} 

ni[i]++; 

I 

} 

if (++nn — olen |j !more && nn) { 
dumpblock(); 
for(i = 0; i < 2; 

po[iJ = out[iJ; 

nn = 0; 



} 

/* 

* dump a block oflines, including numbers, stars: pr_align() 
*/ 

static 

dumpblock() dumpblock 
( 

register i; 

for (i = 0; i < 2; i++) 

*po[i]~ = '\0'; 
Page 4 of nwprint.c 




FTGURE 20 K 

...dumpblock 



(void) putc(V, fx); 
for (i = 0; i < 2; i++) { 

if (*out[i] && (*out[i] != * ' || *(po[i]) != ' ')) { 
if (.-0) 

nums(i); 
if(i = 0&& *out[l]) 

stars(); 

putline(i); 

if (i = 0 && *out[l]) 

fprintf(fx, star); 

if (.— 1) 

nums(i); 

} 

} 

U 1 

Q * put out a number line: dumpblock() 

m */ 

sj static 

_T; nums(ix) . 
Ut int ix; /* index in out[] holding seq line */ 

=75 char nline[P_LFNE]; 

register i,j; 
s register char *pn, *px, *py; 

fy for (pn = nline, i = 0; i < lmax+P_SPC; i++, pn++) 

*pn = ' '; 

for (i = nc[ix], py = out[ix]; *py; py++, pn++) { 

if(*py =="ll *py = v ) 



else { 



*pn = ' '; 

if (i%10 == 0 || (i = 1 && nc[ix] != 1)) { 
j - (i < 0)? -i:i; 
for(px-pn;j; j/= 10, px~) 
♦px^jyolO-i-'O*; 

if(i<0) 



} 

else 

i++; 



*pn = * ' 



} 

> 

*pn = 'NO'; 
nc[ix] = i; 

for (pn = nline; *pn; pn++) 

(void) putc(*pn, fx); 
(void) putc('\n\ fx); 



nums 



* put out a line (name, [num], seq, [num]): dumpblock() 
*/ 

static putline 
putline(ix) 

int ix; 

{ 
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...put line 



int . i; 
register char *px; 

for (px = namex[ix], i = 0; *px && *px != V; px++, i-H-) 

(void) putc(*px, fx); 
for (; i < lmax+P_SPC; i++) 

(void) putc(' fx); 

/* these count from 1 : 

* ni[] is current element (from 1) 

* nc[] is number at start of current line 
*/ 

for (px = out[ix]; *px; px++) 

(void) putc(*px&0x7F, fx); 
(void) putcCW, fx); 



^ * put a line of stars (seqs always in out[0], out[l]): dumpblockQ 

Cl static 

starsQ stars 



{ 



int i; 

register char *p0, *pl , cx, *px; 

if (! *out[0] || (*out[0] = • ' && *(po[0]) = 1 ') || 
!*out[l] || (*out[l] = && *(po[l]) = ' ')) 
return; 

px = star; 

for (i = lmax+P_SPC; i; i-) 
*px++ = "; , 

for (pO = out[0], pi = out[l]; *p0 && *pl ; p0++, pl++) { 
if (isalpha(*pO) && isalpha(*pl)) { 

if (xbmt*pO- , A , ]&xbm[*pl -'A']) { 
cx = ? * f ; 
nm++; 

} 

else if (!dna && .day^pO-WPpl-W] > 0) 

cx = V; 

else 

cx = "; 

} 

else 

cx = 1 '; 
*px++ = cx; 

} 

*px++ = *\n*; 
*px = W; 
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/* ^ 

* strip path or prefix from pn, return len: pr_align() 
*/ 

static 

stripname(pn) stripname 
char *pn; /* file name (may be path) */ 

{ 

register char *px, *py; 
py = 0; 

for (px = pn; *px; px-H-) 
if (*px — 7) 

py = px+l; 

if(py) 

(void) strcpy(pn, py); 
return (strlen(pn)); 
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* cleanup() — cleanup any tmp file 

* getseqQ -- read in seq, set dna, len, maxlen 

* g_calloc() — calloc() with error checkin 

* readjmpsQ - get the good jmps, from tmp file if necessary 

* writejmps() -- write a filled array of jmps to a tmp file: nw() 
V 

^include "nw.h" 
^include <sys/file.h> 



char *jname = "/tmp/homgXXXXXX" 
FILE *fj; 

int cleanup(); 
long lseek(); 

/* 

* remove any tmp file if we blow 
*/ 

cteanup(i) 
{ 



int i; 



exit(i); 



(void) unlink(jname); 



/* tmp file for jmps */ 
/* cleanup tmp file */ 



cleanup 



* read, return ptr to seq, set dna, len, maxlen 

* skip lines starting with '<', or '>' 

* seq in upper or lower case 
V 

char * 

getseq(file, len) 

char *file; /* file name */ 
int *len; /* seq len */ 



char 

register char 

int 

FILE 



line[1024], *pseq; 
*px, *py; 
natgc, tlen; 

*fp; 



getseq 



if((fp=fopen(file,V))==0){ 

fprintf(stden\"%s: can't read %s\n", prog, file); 
exit(l); 

} 

tlen = natgc = 0; 

while (fgets(line, 1024, fp)) { 

if (*Iine == ';' || *line '<' || *line = V) 

continue; 
for (px = line; *px != VT; px++) 

if (isupper(*px) || islower(*px)) 
tlen-H-; 



if ((pseq = malIoc((unsigned)(tlen+6))) == 0) { 

fprintf(stderr,"%s: malloc() failed to get %d bytes for %s\n", prog, tlen+6, file); 
exit(l); 

} 

pseq[0] = pseq[l] = pseq[2] = pseq[3] = '\0'; 



Page I of nwsubr.c 




py = pseq + 4; 
*len = tlen; 
rewind(fp); 
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...getseq 



while (fgets(line, 1024, fp)) { 

if(*line = V II •line — || *li n e = '>') 

continue; 
for (px - line; *px != *\n'; px++) { 
if (isupper(*px)) 

♦py-f-f = *p X ; 
else if (islower(*px)) 

*py++ = toupper(*px); 
if (index(" ATGCU\*(py- 1 ))) 
natgc++; 



} 

U *py++ = '\0'; 

L% *py = rN0 '; 

™ (void) fclose(fp); 

C3 dna - natgc > (tIen/3); 



si 1 



return(pseq+4); 



char * 

g_calIoc(msg, nx, sz) g ca ||oc 

char *msg; /* program, calling routine */ 

int nx, sz; /* number and size of elements */ 



char *px, *ca!loc(); 



{ 

ru 

M if ((px = calloc((unsigned)nx, (unsigoed)sz)) — 0) { 
ifl if(*msg){ 

f==4 fprintf(stderr, "%s: g_ca!loc() failed %s (n=%d, sz=%d)\n", prog, msg, nx, sz); 

H 1 exit(l); 

} 

return(px); 

} 

/* 

* get final jmps from dx[] or tmp file, set pp[], reset dmax: main() 
*/ * 

readjmps() readjmps 

int fd = -l; 

int siz, iO, il; 

register i,j, xx; 

if (OH 

(void) fclose(fj); 

if ((fd - openGname, 0_RDONLY, 0)) < 0) { 

fprintf(stderr, "%s: can't openQ %s\n", prog, jname); 
cleanup(l); 

i 

> 

for (i = iO = i 1 = 0, dmaxO = dmax, xx = lenO; ; i++) { 
while (1){ 

for (j = dx[dmax].ijmp; j >= 0 && dx[dmax].jp.x[j] >= xx; j--) 
> 
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...readjmps 

if 0 < 0 && dx[dmax].ofTset && fj) { 

(void) lseek(fd, dx[dmax).offset, 0); 

(void) read(fd, (char *)&dx[dmax] jp, sizeof(struct jmp)); 

(void) read(fd, (char *)&dx[dmaxj. offset, sizeof(dx[dmax].offset)); 

dx[dmax].ijmp = MAXJMP-l; 

} 

else 

break; 

} 

if(i >=JMPS){ 

fprintf(stderr, "%s: too many gaps in alignment^", prog); 
cleanup(l); 

} 

if (j >=<>){ 

siz = dx[dmax].jp.n[j]; 
xx = dx[dmax].jp.x[j]; 
dmax += siz; 

if (siz < 0) { /* gap in second seq */ 

pp[l].n[il] = -siz; 
xx += siz; 

/* id = xx - yy + lenl - 1 
*/ 

pp[ 1 ].x[i I ] = xx - dmax + len 1 - 1 ; 
gapy++; 
ngapy -= siz; 
/* ignore MAXGAP when doing endgaps */ 

siz = (-siz < MAXGAP || endgaps)? -siz : MAXGAP; 

U4-+; 

} 

else if (siz > 0) { /* gap in first seq */ 
pp[0].n[i0] = siz; 
pp[0].x[iO] = xx; 
gapx++; 
ngapx += siz; 
/* ignore MAXGAP when doing endgaps */ 

siz = (siz < MAXGAP || endgaps)? siz : MAJCGAP; 
i0++; 

} 

} 

else 

break; 

} 

/* reverse the order of jmps 
*/ 

for 0 = 0, i0--;j<i0;j++, i0~) { 

i - pp[0].nD]; pp[0).n[j] = PP[0] n[i0]; pp[0].n[i0] = i; 
i = pp[Q].x[j]; PP[0)-x[j] = PP[0] x[i0]; PP [0].x[i0] = i; 

} 

for(j = 0,il«;j<il;j++,n-){ 

i = PP[l].nDl; PP[1] n[j] = PP[i] n[ii]; pp[$] -n[il] = i; 
» - PP[l] xO); PP[l].xM - pp[l]-x[il]; PP[1] x[il] - i; 

} 

if(fd>= 0) 

(void) close(fd); 

if(fj){ 

(void) unlink(jname); 
fj = 0; 
offset = 0; 
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/* 

* write a filled jmp struct offset of the prev one (if any): nw() 
. */ 

writejmps(ix) 

int ix; 

{ 

char *mktempO; 
if(!®{ 

if (mktempftname) < 0) { 

fprintfl[stderr, "%s: can't mktempO %s\n", prog, jname); 
cleanup(l); 

} 

if ((fj = fopenOname, M w")) = 0) { 

fprintf(stderr, "%s: can't write %s\n", prog, jname); 
exit(l); 

} 

} 

(void) fwrite((char *)&dx[ix]jp, sizeof(stmct jmp), 1, fj); 
(void) fwrite((char *)&dx[ix] .offset, sizeof(dx[ix]. offset), 1, fj); 

} 
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